/* 

****************************************************************************

General Summary:

This file uses several administrative data files from two main sources to construct a sample of 
individuals affected by public housing demolition.

The key output is a file which lists individuals who were affected by public housing demolition.

"list_chdhsid_demo_residents.dta"

****************************************************************************

*/

********************************************************************************
*** Preliminary Setup
********************************************************************************

clear
set more off

global demo  "" 
 // directory for demolition files; must insert CHA building file heres
global raw   ""
 // directory for social assistance case files
global xwalk ""
 // directory for supplemental geocoded IDHS address data  
global temp  ""
 // directory for storing temporary files created within this program

* Toggle which sections of code to run
global step1=0  // Use CHA buildings file and create a list of treat/control buildings
global step2=0  // Use assistance case+address file to create a list of cases relevant for PH demolition
global step3=0  // Create assistance relationship file
global step4=0  // Create assistance case and address file
global step5=0  // Create assistance spell panels (3 types)
global step6=0  // Create an unbalanced panel of assistance receipt -- output is at the person/case/year/month level
global step7=0  // Create a panel of address history for each person actively on assistance -- output is at the person/year/month level
global step8=0  // Create a list of PH-relevant ID #s

* Earliest and latest observed year/month from (1) spell data, (2) relationship data, and (3) address data;
global start_year=1989 // this insures that the beginning of the address/spell file is recorded
global start_month=1
global end_year=2013
global end_month=2

********************************************************************************
*** STEP 1. 
*** Use the raw CHA data on buildings and create treat/control groups
*** Also, add a variable which contains the date of demolition into this file
********************************************************************************

if $step1==1 {

use "${demo}chabuildinglist.dta", clear
 // this is the CHA building file from CHA admin data (same as in Jacob (2004))

** Flag all buildings that will part of the treatment OR control group

* Treatment group
gen treat = .
   
* HORNER TREATMENT 
replace treat=1  if bdg=="HR004" 	
replace treat=1  if bdg=="HR006" 
replace treat=1  if bdg=="HR008" 
replace treat=1  if bdg=="HR009" 
replace treat=1  if bdg=="HR012" 
replace treat=1  if bdg=="HR013" 
replace treat=1  if bdg=="HR023" 
replace treat=1  if bdg=="HR024" 

replace treat=1  if bdg=="HR926" 

* HORNER CONTROL   
replace treat=0  if bdg=="HR003" 
replace treat=0  if bdg=="HR007" 
replace treat=0  if bdg=="HR014" 
replace treat=0  if bdg=="HR015" 
replace treat=0  if bdg=="HR016" 
replace treat=0  if bdg=="HR017" 
replace treat=0  if bdg=="HR018" 
replace treat=0  if bdg=="HR025" 

* MADDEN PARK TREATMENT
replace treat=1  if bdg=="MA010" 	
replace treat=1  if bdg=="MA011"     

* MADDEN PARK CONTROL
replace treat=0  if bdg=="MA009" 

* ROBERT TAYLOR TREATMENT
replace treat=1  if bdg=="RT001"   
replace treat=1  if bdg=="RT004" 
replace treat=1  if bdg=="RT010" 
replace treat=1  if bdg=="RT011" 
*replace treat=1  if bdg=="RT015"  // *the hole 
replace treat=1  if bdg=="RT016" 
replace treat=1  if bdg=="RT017" 
replace treat=1  if bdg=="RT020" 
replace treat=1  if bdg=="RT021" 
*replace treat=1  if bdg=="RT022"  // *the hole 
*replace treat=1  if bdg=="RT023"  // *the hole 
replace treat=1  if bdg=="RT025" 
replace treat=1  if bdg=="RT028"  

* ROBERT TAYLOR CONTROL
replace treat=0  if bdg=="RT002" 	//	*ROBERT TAYLOR 
replace treat=0  if bdg=="RT003" 
replace treat=0  if bdg=="RT005" 
replace treat=0  if bdg=="RT006" 
replace treat=0  if bdg=="RT007" 
replace treat=0  if bdg=="RT009"  
replace treat=0  if bdg=="RT012" 
replace treat=0  if bdg=="RT013" 
replace treat=0  if bdg=="RT014" 
replace treat=0  if bdg=="RT018" 
replace treat=0  if bdg=="RT019"  
replace treat=0  if bdg=="RT024"  
replace treat=0  if bdg=="RT026" 
replace treat=0  if bdg=="RT027"

* WASHINGTON PARK TREATMENT
replace treat=1  if bdg=="WA026" 	
replace treat=1  if bdg=="WA044" 
replace treat=1  if bdg=="WA085" 

* WASHINGTON PARK CONTROL  
replace treat=0  if bdg=="WA035" 	
replace treat=0  if bdg=="WA042" 
replace treat=0  if bdg=="WA065" 
replace treat=0  if bdg=="WA139" 

* WELLS EXTENSION TREATMENT  
replace treat=1  if bdg=="WE001"    //		*WELLS-EXTENSION 
replace treat=1  if bdg=="WE002" 
replace treat=1  if bdg=="WE003" 
replace treat=1  if bdg=="WE006" 

* WELLS EXTENSION CONTROL
replace treat=0  if bdg=="WE004"    //		*WELLS-EXTENSION 
replace treat=0  if bdg=="WE005" 
replace treat=0  if bdg=="WE007" 
replace treat=0  if bdg=="WE008"
replace treat=0  if bdg=="WE009" 
replace treat=0  if bdg=="WE010"

* STATEWAY GARDEN TREATMENT
replace treat=1  if bdg=="SW004" 	

* STATEWAY GARDEN CONTROL
replace treat=0  if bdg=="SW001" 	
replace treat=0  if bdg=="SW003" 	
replace treat=0  if bdg=="SW005" 	
replace treat=0  if bdg=="SW006" 	
replace treat=0  if bdg=="SW007" 	
replace treat=0  if bdg=="SW008" 	
replace treat=0  if bdg=="SW009"  

* ROCKWALL GARDENS TREATMENT
replace treat=1  if bdg=="RC001"   //	*ROCKWELL 
replace treat=1  if bdg=="RC002" 	

* ROCKWALL GARDENS CONTROL  
replace treat=0  if bdg=="RC004" 	//	*ROCKWELL 
replace treat=0  if bdg=="RC006"   

* create demolition date variable (this lines up with Jacob (2004) definition)
 // note: "adate" = annoucement date

gen adate=mdy(1,1,2001) 
replace adate=. 

replace adate=mdy(9,1,1992) if bdg=="AB123"  	// *ABLA 

replace adate=mdy(9,1,1995) if bdg=="AB025" 
replace adate=mdy(9,1,1995) if bdg=="AB043" 
replace adate=mdy(9,1,1995) if bdg=="AB109" 

replace adate=mdy(9,1,1992) if bdg=="CA129"  	// *CABRINI 
replace adate=mdy(9,1,1992) if bdg=="CA123" 
replace adate=mdy(9,1,1992) if bdg=="CA118" 
replace adate=mdy(9,1,1992) if bdg=="CA120" 

replace adate=mdy(9,1,1995) if bdg=="CA097" 
replace adate=mdy(9,1,1995) if bdg=="CA104" 
replace adate=mdy(9,1,1995) if bdg=="CA105" 
replace adate=mdy(9,1,1995) if bdg=="CA106" 
replace adate=mdy(9,1,1995) if bdg=="CA107" 
replace adate=mdy(9,1,1995) if bdg=="HL001" 		// *HILLARD 

replace adate=mdy(9,1,1995) if bdg=="HR004" 		// *HORNER 
replace adate=mdy(9,1,1995) if bdg=="HR006" 
replace adate=mdy(9,1,1995) if bdg=="HR008" 
replace adate=mdy(9,1,1995) if bdg=="HR009" 
replace adate=mdy(9,1,1995) if bdg=="HR012" 
replace adate=mdy(9,1,1995) if bdg=="HR013" 
replace adate=mdy(9,1,1995) if bdg=="HR023" 
replace adate=mdy(9,1,1995) if bdg=="HR024" 

replace adate=mdy(9,1,1995) if bdg=="HR926" 
replace adate=mdy(9,1,1992) if bdg=="HR921"   // *could be 1992 or 93 
replace adate=mdy(9,1,1992) if bdg=="HR920"   // *could be 1992 or 93 

replace adate=mdy(9,1,1998) if bdg=="IC004" 		// *ICKES 
replace adate=mdy(9,1,1995) if bdg=="LN002" 		// *LAWNDALE 
replace adate=mdy(9,1,1995) if bdg=="MA010" 		// *MADDEN 
replace adate=mdy(9,1,1998) if bdg=="MA011" 
replace adate=mdy(9,1,1998) if bdg=="RC001" 		// *ROCKWELL 
replace adate=mdy(9,1,1998) if bdg=="RC002" 	
replace adate=mdy(9,1,1995) if bdg=="RC007" 
replace adate=mdy(9,1,1995) if bdg=="RC008" 

replace adate=mdy(9,1,1995) if bdg=="RT001" 		// *ROBERT TAYLOR 
replace adate=mdy(9,1,1998) if bdg=="RT004" 
replace adate=mdy(9,1,1997) if bdg=="RT008" 
replace adate=mdy(9,1,1998) if bdg=="RT010" 
replace adate=mdy(9,1,1998) if bdg=="RT011" 
replace adate=mdy(9,1,1997) if bdg=="RT015"  // *the hole 
replace adate=mdy(9,1,1998) if bdg=="RT016" 
replace adate=mdy(9,1,1998) if bdg=="RT017" 
replace adate=mdy(9,1,1998) if bdg=="RT020" 
replace adate=mdy(9,1,1998) if bdg=="RT021" 
replace adate=mdy(9,1,1997) if bdg=="RT022"  // *the hole 
replace adate=mdy(9,1,1997) if bdg=="RT023"  // *the hole 
replace adate=mdy(9,1,1998) if bdg=="RT025" 
replace adate=mdy(9,1,1998) if bdg=="RT028" 

replace adate=mdy(9,1,1996) if bdg=="SW004" 		// *STATEWAY 
replace adate=mdy(9,1,1995) if bdg=="WA026" 		// *WASHINGTON PARK 
replace adate=mdy(9,1,1995) if bdg=="WA044" 
replace adate=mdy(9,1,1995) if bdg=="WA085" 

replace adate=mdy(9,1,1993) if bdg=="WA100"  // *could be 1992 or 93 
  
replace adate=mdy(9,1,1995) if bdg=="WD008" 		// *WELLS-DARROW 
replace adate=mdy(9,1,1998) if bdg=="WD009" 	
replace adate=mdy(9,1,1995) if bdg=="WD010" 	
replace adate=mdy(9,1,1995) if bdg=="WD015" 
replace adate=mdy(9,1,1995) if bdg=="WE001" 		// *WELLS-EXTENSION 
replace adate=mdy(9,1,1995) if bdg=="WE002" 
replace adate=mdy(9,1,1995) if bdg=="WE003" 
replace adate=mdy(9,1,1995) if bdg=="WE006" 

* keep treat/control group
drop if treat==.

* insure that treatment group has demo dates
assert adate~=. if treat==1
count

lab data "File with public housing buildings (by street address)"

save "${demo}temp_PH_demolition.dta", replace

}

********************************************************************************
*** STEP 2. 
*** Open the raw social assistance address data and create rough address variables
*** Merge by address variables with the public housing (demolition) data created in Step 1.
*** Note: This step is only necessary to get a narrow list of PH-relevant cases
********************************************************************************

if $step2==1 {
     
gzuse "${raw}caseaddr_200606_201212.dta.gz", clear
  // this file comes from IDHS admin records for social assistance cases in Cook county

! gunzip "${xwalk}xwalk_addresses_pre_geo_spell_full.dta.gz" 
  // this file is an "interemdiate" address file that has some more refined address variables for the raw data
merge n:1 caseaddr casecity casezip using "${xwalk}xwalk_addresses_pre_geo_spell_full.dta"
! gzip "${xwalk}xwalk_addresses_pre_geo_spell_full.dta" 

* filter
keep if _merge==3
keep if state=="IL"
keep if city=="CHICAGO"

drop _merge

** Create variables related to street address (these will be used to merge with the demolition-PH file)

* street number
gen strnum = ""

* create a street number (up to four digits)
gen temp1 = regexs(0) if(regexm(address, "[0-9]"))
gen temp2 = regexs(0) if(regexm(address, "[0-9][0-9]"))
gen temp3 = regexs(0) if(regexm(address, "[0-9][0-9][0-9]"))
gen temp4 = regexs(0) if(regexm(address, "[0-9][0-9][0-9][0-9]"))

replace strnum = temp1
replace strnum = temp2 if (strnum=="" | temp2~="")
replace strnum = temp3 if (strnum=="" | temp3~="")
replace strnum = temp4 if (strnum=="" | temp4~="")

drop temp*

* street direction
gen strdir = ""

replace strdir = "N" if regexm(address, "NORTH")
replace strdir = "S" if regexm(address, "SOUTH")

replace strdir = "E" if regexm(address, "EAST") & strdir==""
replace strdir = "W" if regexm(address, "WEST") & strdir==""

* street type
gen strtype = ""
 
replace strtype = "AVE" if regexm(address, "AVENUE")
replace strtype = "BLVD" if regexm(address, "BOULEVARD")
replace strtype = "PL" if regexm(address, "PLACE")
*replace strtype = "PL" if regexm(address, "PLAZ")
replace strtype = "ST" if regexm(address, "STREET")
replace strtype = "ST" if regexm(address, "STR")

* street names 
gen strname = ""

/* next section of code only fills in street names for streets that DO HAVE demolition-relevant public housing */
/* will use this field, in conjunction with other address pieces from above to do the merge */

replace strname = "36TH" if regexm(address, "36th")  
replace strname = "36TH" if regexm(address, "36TH") 
replace strname = "36TH" if regexm(address, "36 TH ")
replace strname = "36TH" if regexm(address, " 36 ") & strname==""
replace strname = "36TH" if regexm(address, "THIRTYSIX")
replace strname = "36TH" if regexm(address, "THIRTY SIX")
replace strname = "36TH" if regexm(address, "THIRTY SIXTH")
replace strname = "36TH" if regexm(address, "THIRTY-SIXTH")
replace strname = "36TH" if regexm(address, "THIRTYSIXTH")

replace strname = "37TH" if regexm(address, "37th")  
replace strname = "37TH" if regexm(address, "37TH") 
replace strname = "37TH" if regexm(address, "37 TH ")
replace strname = "37TH" if regexm(address, " 37 ") & strname==""
replace strname = "37TH" if regexm(address, "THIRTYSEVEN")
replace strname = "37TH" if regexm(address, "THIRTY SEVEN")
replace strname = "37TH" if regexm(address, "THIRTY SEVENTH")
replace strname = "37TH" if regexm(address, "THIRTY-SEVENTH")
replace strname = "37TH" if regexm(address, "THIRTYSEVENTH")

replace strname = "63RD" if regexm(address, "63rd")  
replace strname = "63RD" if regexm(address, "63RD") 
replace strname = "63RD" if regexm(address, "63 RD ") 
replace strname = "63RD" if regexm(address, " 63 ") & strname==""
replace strname = "63RD" if regexm(address, "SIXTYTHREE")
replace strname = "63RD" if regexm(address, "SIXTY THREE")
replace strname = "63RD" if regexm(address, "SIXTY THIRD")
replace strname = "63RD" if regexm(address, "SIXTY-THIRD")
replace strname = "63RD" if regexm(address, "SIXTYTHIRD")

replace strname = "ADAMS" if regexm(address, "ADAMS")
replace strname = "BOWEN" if regexm(address, "BOWEN")
replace strname = "BROWNING" if regexm(address, "BROWNING")
replace strname = "CALUMET" if regexm(address, "CALUMET")
replace strname = "COTTAGE GROVE" if regexm(address, "COTTAGE GROVE")
replace strname = "ELLIS" if regexm(address, "ELLIS")
replace strname = "EVANS" if regexm(address, "EVANS")
replace strname = "FEDERAL" if regexm(address, "FEDERAL") 
replace strname = "HERMITAGE" if regexm(address, "HERMITAGE") 
replace strname = "HOYNE" if regexm(address, "HOYNE")
replace strname = "JACKSON" if regexm(address, "JACKSON") 
replace strname = "LAKE" if regexm(address, "LAKE")
replace strname = "MONROE" if regexm(address, "MONROE")
replace strname = "PRAIRIE" if regexm(address, "PRAIRIE")
replace strname = "STATE" if regexm(address, "STATE")
replace strname = "WABASH" if regexm(address, "WABASH")
replace strname = "WASHINGTON" if regexm(address, "WASHINGTON")
replace strname = "WOLCOTT" if regexm(address, "WOLCOTT")
replace strname = "WOOD" if regexm(address, "WOOD")

count if strnum~=""
 
*** MANUAL EDITS 

 /* Here, I need to work with some of the addresses manually because street
    type is often not available. So, if "strtype" is missing then, I can't 
	get a merge. 
	
	below, I replace the "type" after I have looked up the street on Google maps
	to verify there are no other "types" for this street name, i.e. "ELLIS DR." VERSUS "ELLIS STREET"

 */

replace strtype = "AVE" if strdir=="N" & strname=="HOYNE" & strtype=="" 
replace strtype = "AVE" if strdir=="S" & strname=="ELLIS" & strtype=="" 
replace strtype = "AVE" if strdir=="E" & strname=="BOWEN" & strtype==""
replace strtype = "AVE" if strdir=="E" & strname=="BROWNING" & strtype==""        
replace strtype = "BLVD" if strdir=="W" & strname=="JACKSON" & strtype==""
replace strtype = "ST" if strdir=="W" & strname=="MONROE" & strtype==""
replace strtype = "ST" if strdir=="W" & strname=="ADAMS" & strtype==""
replace strtype = "ST" if strdir=="S" & strname=="FEDERAL" & strtype==""
replace strtype = "ST" if strdir=="S" & strname=="STATE" & strtype==""
replace strtype = "ST" if strdir=="W" & strname=="LAKE" & strtype==""
 
replace strtype = "ST" if strdir=="N" & strname=="WOOD" & strtype=="" 
replace strtype = "PL" if strdir=="E" & strname=="36TH" & strtype=="" 
replace strtype = "ST" if strdir=="E" & strname=="63RD" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="CALUMET" & strtype=="" 
replace strtype = "AVE" if strdir=="S" & strname=="COTTAGE GROVE" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="EVANS" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="PRAIRIE" & strtype==""
replace strtype = "AVE" if strdir=="N" & strname=="HERMITAGE" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="WABASH" & strtype==""
 
** Merge with public housing demo file created in Step 1
merge n:1 strnum strname strdir strtype using "${demo}temp_PH_demolition.dta"
 // this file is created above
drop if _merge==2

format pull_date %tdDD_Mon_CCYY
format adate %tdDD_Mon_CCYY
 
* keep the relevant public housing observations
keep if treat~=.
 
* keep relevant variables (will need this for subsequent to store information on treat/adate)
keep CH_DPA_CASEID pull_date address strnum strname strdir strtype treat adate

assert address~=""

gen year=year(pull_date)
gen month=month(pull_date)

drop pull_date 

* filter to the relevant time period (set above)
drop if year<$start_year
drop if year>$end_year

* store list of relevant cases (will use this to cut down the size of the data)
keep CH_DPA_CASEID 
duplicates drop

save "${demo}temp_case_id_list.dta", replace

}

********************************************************************************
*** STEP 3. 
*** Make an individual-level panel of relationship status to the grantee (recipient) observations
********************************************************************************

if $step3==1 {

gzuse "${raw}relation_200606_201212.dta.gz", clear
 // relation file for the individuals on social assistance
 
* Store unique chdhsids-case IDs for later use
preserve
keep chdhsid_201212 CH_DPA_CASEID
duplicates drop
save "${temp}relationship_IDs.dta", replace
restore

keep chdhsid_201212 CH_DPA_CASEID reltogte pull_date
gen year=year(pull_date)
gen month=month(pull_date)
drop pull_date
keep if inlist(reltogte,"01","02","03","04","05","06","07","08","09") | inlist(reltogte,"10","11","25","81","82")
duplicates drop
	
* Replace status with modal value if different observations appear in the same month
gsort +chdhsid_201212 +year +month
by chdhsid_201212: egen mode_reltogte=mode(reltogte)
by chdhsid_201212 year month: egen nvals_reltogte=nvals(reltogte)
replace reltogte=mode_reltogte if nvals_reltogte>1 & mode_reltogte!=""
drop nvals_reltogte mode_reltogte
	
* Pick one status randomly when no modal value exists but different observations appear in the same month
set seed 1234
gen rnum=runiform()
gsort +chdhsid_201212 +year +month +rnum
by chdhsid_201212 year month: replace reltogte=reltogte[1]
drop rnum
	
gen grantee=(reltogte=="82")
gen payee=(reltogte=="25")
gen child=inlist(reltogte,"01","02","03","04","05","06","07","08","09") | inlist(reltogte,"10","11")
gen adult=(reltogte=="81")
collapse (max) grantee payee child adult, by(chdhsid_201212 CH_DPA_CASEID year month)
		
gen status_change=1
save "${temp}temp_relationship.dta", replace 

}

********************************************************************************
*** STEP 4. 
*** Make a case-address file (unbalanced) 
********************************************************************************

if $step4==1 {
 
gzuse "${raw}caseaddr_200606_201212.dta.gz", clear 
 // this file comes from IDHS admin records for social assistance cases in Cook county
  
keep CH_DPA_CASEID caseaddr casecity casezip pull_date
gen year=year(pull_date)
gen month=month(pull_date)
drop pull_date
duplicates drop
	
* Merge on address ID, coordinate ID, and tract information
!gunzip "${xwalk}xwalk_addresses_pre_geo_spell_full.dta.gz"
merge n:1 caseaddr casecity casezip using "${xwalk}xwalk_addresses_pre_geo_spell_full.dta"
 // this file is an "interemdiate" address file that has some more refined address variables for the raw data
keep if _merge==3
!gzip "${xwalk}xwalk_addresses_pre_geo_spell_full.dta"
		
keep CH_DPA_CASEID year month address state city zip
drop if address==""
	
preserve
keep CH_DPA_CASEID year month address state city zip
duplicates drop
gen address_change=1 /* This is used to help fill in the panel */
save "${temp}temp_address.dta", replace
!gzip "${temp}temp_address.dta"
restore

}
 

********************************************************************************
*** STEP 5. 
*** Make person-case-month panels for receipt of different types of assistance
******************************************************************************** 

if $step5==1 {

foreach type in foodstamp grant medicaid {

	di "Preparing `type' panel"
	
	gzuse "${raw}`type'_spells_200606_201212.dta.gz", clear
	 // these are IDHS admin records on assistance receipt

	duplicates drop
	
	* Store unique chdhsids-case IDs for later use
	preserve
	keep chdhsid_201212 CH_DPA_CASEID
	duplicates drop
	save "${temp}`type'_IDs.dta", replace /* these files will be used to start building the panel */
	restore
	
	* Create date variables from case spell start/end dates
	foreach k in start end {
		format `k'_date %td
		gen `k'_year=year(`k'_date)
		gen `k'_month=month(`k'_date)
		gen `k'_day=day(`k'_date)
	}
	
	* Create person-case-month panel
	gen row_id=_n
	gen n_expand=12*(end_year-start_year)+12
	expand n_expand /* expands data based on the duration of the spell */
	bysort row_id: gen year=start_year+int((_n-1)/12)
	bysort row_id year: gen month=_n
	
	* Create binary indicator for receipt of assistance
	gen `type'=0
	replace `type'=1 if year==start_year & month==start_month
	replace `type'=1 if year==end_year & month==end_month
	replace `type'=1 if end_year==start_year & inrange(month,start_month,end_month)
	replace `type'=1 if year==start_year & end_year>start_year & month>start_month
	replace `type'=1 if year==end_year & end_year>start_year & month<end_month
	replace `type'=1 if year>start_year & year<end_year
	drop if `type'==0 /* drop observations (months) when the spell was not active */
	
	keep chdhsid_201212 CH_DPA_CASEID year month `type'* // note: cannot save the start or end dates as is b/c there will be conflict between the 3 assistance files
	duplicates drop
			
	gzsave "${temp}temp_`type'.dta.gz", replace
 } // close assistance type loop

}


********************************************************************************
*** STEP 6. 
*** Make person-case-month (unbalanced) panel
******************************************************************************** 

if $step6==1 {

* Start with all observed chdhsids-case ID pairs
drop _all

foreach type in foodstamp grant medicaid relationship {
	append using "${temp}`type'_IDs.dta", force /* all IDs are combined together */
}

duplicates drop

* Merge with the initial list of public-housing relevant cases
merge n:1 CH_DPA_CASEID using "${demo}temp_case_id_list.dta", keep(1 3)
 /* the key here is the cases are connected to individual IDs; cases in the using file are filtered to addresses that are PH */
 // this file is created above

gen ph = (_merge==3)

* Within a person-ID, find out if they are associated with a public housing case
bysort chdhsid_201212: egen ph_ever = max(ph)

* filter/flatten data to just public-housing individuals -- this will help with the panel construction
keep if ph_ever==1
drop ph_ever ph

 /* Note: The append/stack code here will stack IDs + cases on top of each other. Next, this will get expanded */

compress

* Create panel structure
local n_expand=12*($end_year-$start_year)+12
expand `n_expand'
bysort chdhsid_201212 CH_DPA_CASEID: gen year=$start_year+int((_n-1)/12)
bysort chdhsid_201212 CH_DPA_CASEID year: gen month=_n
drop if year==$start_year & month<$start_month
drop if year==$end_year & month>$end_month

* Merge on aid receipt data (this will obtain start and end dates)
foreach type in foodstamp grant medicaid {
	!gunzip "${temp}temp_`type'.dta.gz"
	mmerge chdhsid_201212 CH_DPA_CASEID year month using "${temp}temp_`type'.dta", t(1:1) unmatched(master)
	 // each of these files was created above
	!gzip "${temp}temp_`type'.dta"
	drop _merge
	replace `type'=0 if missing(`type')
}

* Create assistance measure (for any type of service)
egen temp=rowtotal(foodstamp grant medicaid)
gen assistance=temp>0
drop temp
	
* Merge on relationship status
mmerge chdhsid_201212 CH_DPA_CASEID year month using "${temp}temp_relationship.dta", t(1:1) unmatched(master)
replace status_change=0 if missing(status_change)
drop _merge

* Merge on address (created above)
  /* note that the address data contains the addresses for all types of assistance cases (foodstamps versus AFDC/TANF, etc.) */
  /* note that address data which is defined at the "pull_date" time variable, should occur at some point within SOME (3 kinds) of spell */
!gunzip "${temp}temp_address.dta.gz"
mmerge CH_DPA_CASEID year month using "${temp}temp_address.dta", t(n:1) missing(value) unmatched(master)
!gzip "${temp}temp_address.dta"
replace address_change=0 if missing(address_change)

* Drop records BEFORE the start of the first spell, relationship status change, or address change
gsort +chdhsid_201212 +CH_DPA_CASEID +year +month
by chdhsid_201212 CH_DPA_CASEID: gen temp1=sum(assistance)
by chdhsid_201212 CH_DPA_CASEID: gen temp2=sum(status_change)
by chdhsid_201212 CH_DPA_CASEID: gen temp3=sum(address_change)
drop if temp1==0 & temp2==0 & temp3==0

* Drop records AFTER the end of the last spell, relationship status change, or address change
by chdhsid_201212 CH_DPA_CASEID: egen temp4=total(assistance)
by chdhsid_201212 CH_DPA_CASEID: egen temp5=total(status_change)
by chdhsid_201212 CH_DPA_CASEID: egen temp6=total(address_change)
drop if assistance==0 & status_change==0 & address_change==0 & temp4==temp1 & temp5==temp2 & temp6==temp3

drop temp*

drop _merge
	
/*
Fill down a person's latest relationship status from any case across all months when they are active,
until reaching a new relationship status.

When an individual is not active, we don't know if their status has changed.
When an individual is active, we assume their last reported status (on any case) is still accurate.
*/
	
gsort +chdhsid_201212 +year +month -status_change
foreach k of varlist grantee payee child adult {
	by chdhsid_201212: carryforward `k', replace
	replace `k'=. if assistance==0 & status_change==0
}

* Fill down non-missing address data for each case during active months
gsort +CH_DPA_CASEID +year +month -address_change
by CH_DPA_CASEID: carryforward address, replace
by CH_DPA_CASEID: carryforward state, replace
by CH_DPA_CASEID: carryforward city, replace
by CH_DPA_CASEID: carryforward zip, replace

replace address="" if assistance==0 & address_change==0
replace state="" if assistance==0 & address_change==0
replace city="" if assistance==0 & address_change==0
replace zip="" if assistance==0 & address_change==0

* Remove observations where individuals receive no assistance, update to their relationship status, or address
drop if assistance==0

cap drop _merge
compress

gzsave "${temp}spell_person_case_month_panel_nonexp_EC_v2.dta.gz", replace
	
}

********************************************************************************
*** STEP 7. 
*** Create a panel of address histories / Merge on public housing building info
********************************************************************************

if $step7==1 {

* Step 7.1: Create an address history at the person/year/month level (flattens file from the case level)

gzuse "${temp}spell_person_case_month_panel_nonexp_EC_v2.dta.gz", clear
 // this is an unbalanced panel at the case/address/year/month level

duplicates report chdhsid_201212 CH_DPA_CASEID year month // unique at this level
count if address=="" 
 
* flag individuals
egen tag = tag(chdhsid_201212)

* Total # of cases in a year/month for any person
bysort chdhsid_201212 year month: egen N_cases=total(1==1)
 /* no duplicates at the person/case/year/month level; duplicates at the person/year/month level are multiple cases */
 
* Store first non-missing address within a person/year/month 
gsort +chdhsid +year +month -CH_DPA 
*by chdhsid_201212 year month: egen temp1=first(address) // first option does not fill in on missing rows
by chdhsid_201212 year month: egen temp1=mode(address), min

* Row Number for this first non-missing address
by chdhsid_201212 year month: gen temp2=_n if address==temp1 & !missing(address)
by chdhsid_201212 year month: egen temp3=min(temp2)

* Replace missings address cases with the first non-missing
by chdhsid_201212 year month: replace city=city[temp3] if address==""
by chdhsid_201212 year month: replace state=state[temp3] if address==""
by chdhsid_201212 year month: replace zip=zip[temp3] if address==""
by chdhsid_201212 year month: replace address=address[temp3] if address==""

* Replace conflicting city/state (but NOT street) with the first non-missing addresses state and city (this only applies to a few minor cases)
by chdhsid_201212 year month: replace city=city[temp3] if address==temp1
by chdhsid_201212 year month: replace state=state[temp3] if address==temp1
by chdhsid_201212 year month: replace zip=zip[temp3] if address==temp1

* Flag any case where an address does not match the first non-missing address
by chdhsid_201212 year month: gen temp4 = (address~=temp1) 

* Flag year/months where there is a conflict for any person
by chdhsid_201212 year month: egen N_addr_prb = total(temp4)

*** Compile address history

* set to missing address-related fields if there are conflicts at the year/month level
replace address="" if N_addr_prb>0
replace state="" if N_addr_prb>0
replace city="" if N_addr_prb>0
replace zip="" if N_addr_prb>0 

* flatten
keep chdhsid_201212 year month address state city zip N_addr_prb N_cases
duplicates drop

duplicates tag chdhsid_201212 year month, gen(flag)
assert flag==0

drop flag

** Create variables related to street address (these will be used to merge with the demolition-PH file)
 /* Note: This is the same code/application used above in Step 2. */

* street number
gen strnum = ""

* create a street number (up to four digits)
gen temp1 = regexs(0) if(regexm(address, "[0-9]"))
gen temp2 = regexs(0) if(regexm(address, "[0-9][0-9]"))
gen temp3 = regexs(0) if(regexm(address, "[0-9][0-9][0-9]"))
gen temp4 = regexs(0) if(regexm(address, "[0-9][0-9][0-9][0-9]"))

replace strnum = temp1
replace strnum = temp2 if (strnum=="" | temp2~="")
replace strnum = temp3 if (strnum=="" | temp3~="")
replace strnum = temp4 if (strnum=="" | temp4~="")

drop temp*

* street direction
gen strdir = ""

replace strdir = "N" if regexm(address, "NORTH")
replace strdir = "S" if regexm(address, "SOUTH")

replace strdir = "E" if regexm(address, "EAST") & strdir==""
replace strdir = "W" if regexm(address, "WEST") & strdir==""

* street type
gen strtype = ""
 
replace strtype = "AVE" if regexm(address, "AVENUE")
replace strtype = "BLVD" if regexm(address, "BOULEVARD")
replace strtype = "PL" if regexm(address, "PLACE")
*replace strtype = "PL" if regexm(address, "PLAZ")
replace strtype = "ST" if regexm(address, "STREET")
replace strtype = "ST" if regexm(address, "STR")

* street names 
gen strname = ""

/* next section of code only fills in street names for streets that DO HAVE demolition-relevant public housing */
/* will use this field, in conjunction with other address pieces from above to do the merge */

replace strname = "36TH" if regexm(address, "36th")  
replace strname = "36TH" if regexm(address, "36TH") 
replace strname = "36TH" if regexm(address, "36 TH ")
replace strname = "36TH" if regexm(address, " 36 ") & strname==""
replace strname = "36TH" if regexm(address, "THIRTYSIX")
replace strname = "36TH" if regexm(address, "THIRTY SIX")
replace strname = "36TH" if regexm(address, "THIRTY SIXTH")
replace strname = "36TH" if regexm(address, "THIRTY-SIXTH")
replace strname = "36TH" if regexm(address, "THIRTYSIXTH")

replace strname = "37TH" if regexm(address, "37th")  
replace strname = "37TH" if regexm(address, "37TH") 
replace strname = "37TH" if regexm(address, "37 TH ")
replace strname = "37TH" if regexm(address, " 37 ") & strname==""
replace strname = "37TH" if regexm(address, "THIRTYSEVEN")
replace strname = "37TH" if regexm(address, "THIRTY SEVEN")
replace strname = "37TH" if regexm(address, "THIRTY SEVENTH")
replace strname = "37TH" if regexm(address, "THIRTY-SEVENTH")
replace strname = "37TH" if regexm(address, "THIRTYSEVENTH")

replace strname = "63RD" if regexm(address, "63rd")  
replace strname = "63RD" if regexm(address, "63RD") 
replace strname = "63RD" if regexm(address, "63 RD ") 
replace strname = "63RD" if regexm(address, " 63 ") & strname==""
replace strname = "63RD" if regexm(address, "SIXTYTHREE")
replace strname = "63RD" if regexm(address, "SIXTY THREE")
replace strname = "63RD" if regexm(address, "SIXTY THIRD")
replace strname = "63RD" if regexm(address, "SIXTY-THIRD")
replace strname = "63RD" if regexm(address, "SIXTYTHIRD")

replace strname = "ADAMS" if regexm(address, "ADAMS")
replace strname = "BOWEN" if regexm(address, "BOWEN")
replace strname = "BROWNING" if regexm(address, "BROWNING")
replace strname = "CALUMET" if regexm(address, "CALUMET")
replace strname = "COTTAGE GROVE" if regexm(address, "COTTAGE GROVE")
replace strname = "ELLIS" if regexm(address, "ELLIS")
replace strname = "EVANS" if regexm(address, "EVANS")
replace strname = "FEDERAL" if regexm(address, "FEDERAL") 
replace strname = "HERMITAGE" if regexm(address, "HERMITAGE") 
replace strname = "HOYNE" if regexm(address, "HOYNE")
replace strname = "JACKSON" if regexm(address, "JACKSON") 
replace strname = "LAKE" if regexm(address, "LAKE")
replace strname = "MONROE" if regexm(address, "MONROE")
replace strname = "PRAIRIE" if regexm(address, "PRAIRIE")
replace strname = "STATE" if regexm(address, "STATE")
replace strname = "WABASH" if regexm(address, "WABASH")
replace strname = "WASHINGTON" if regexm(address, "WASHINGTON")
replace strname = "WOLCOTT" if regexm(address, "WOLCOTT")
replace strname = "WOOD" if regexm(address, "WOOD")
replace strname = "" if regexm(address, "LOCKWOOD") 

*** MANUAL EDITS 

 /* Here, I need to work with some of the addresses manually because street
    type is often not available. So, if "strtype" is missing then, I can't 
	get a merge. 

    below, I replace the "type" after I have looked up the street on Google maps to verify there are no other "types" for this street name, i.e. "ELLIS DR." VERSUS "ELLIS STREET"	
*/

replace strtype = "AVE" if strdir=="N" & strname=="HOYNE" & strtype=="" 
replace strtype = "AVE" if strdir=="S" & strname=="ELLIS" & strtype=="" 
replace strtype = "AVE" if strdir=="E" & strname=="BOWEN" & strtype==""
replace strtype = "AVE" if strdir=="E" & strname=="BROWNING" & strtype==""        
replace strtype = "BLVD" if strdir=="W" & strname=="JACKSON" & strtype==""
replace strtype = "ST" if strdir=="W" & strname=="MONROE" & strtype==""
replace strtype = "ST" if strdir=="W" & strname=="ADAMS" & strtype==""
replace strtype = "ST" if strdir=="S" & strname=="FEDERAL" & strtype==""
replace strtype = "ST" if strdir=="S" & strname=="STATE" & strtype==""
replace strtype = "ST" if strdir=="W" & strname=="LAKE" & strtype==""
 
replace strtype = "ST" if strdir=="N" & strname=="WOOD" & strtype=="" 
replace strtype = "PL" if strdir=="E" & strname=="36TH" & strtype=="" // this could be street or place, but they are both part of the same demo group
replace strtype = "ST" if strdir=="E" & strname=="63RD" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="CALUMET" & strtype=="" 
replace strtype = "AVE" if strdir=="S" & strname=="COTTAGE GROVE" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="EVANS" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="PRAIRIE" & strtype==""
replace strtype = "AVE" if strdir=="N" & strname=="HERMITAGE" & strtype==""
replace strtype = "AVE" if strdir=="S" & strname=="WABASH" & strtype==""
 
** Merge with public housing demo file (created above)
merge n:1 strnum strname strdir strtype using "${demo}temp_PH_demolition.dta", keep(1 3) keepusing(treat adate bdg)

* check
list chdhsid year month address str* treat adate bdg if chdhsid==29423

format adate %tdDD_Mon_CCYY

* flag individuals
egen tag = tag(chdhsid_201212)

* address status
gen ph = (_merge==3)
bysort chdhsid_201212: egen ph_ever = max(ph)

* ever in a treated building
gen temp1 = (treat==1)
bysort chdhsid_201212: egen tr_building_ever = max(temp1)

* ever in a control building
gen temp2 = (treat==0)
bysort chdhsid_201212: egen ctrl_building_ever = max(temp2)

gen demo_year = year(adate)
 /* adate is the announcement date */
 
assert tr_building_ever==1 if demo_year~=.

drop *_building_ever temp* str* _merge

gen treated = (year==demo_year)

bysort chdhsid_201212: egen treated_ever = max(treated)

sort chdhsid year month

* Drop any person who is NOT in relevant public housing ever (as determined by the criteria above)
drop if ph_ever==0
 
drop ph ph_ever treated_ever treated demo_year tag
 
* save
lab data "Monthly address panel for demo PH sample. ID must be on assistance in each month."

gzsave "${temp}PH_sample_month_address_nonexp_EC_v3.dta.gz", replace

}

********************************************************************************
*** STEP 8. 
*** Create project group variable / Save a list of PH-housing relevant IDs
********************************************************************************

if $step8==1 {

gzuse "${temp}PH_sample_month_address_nonexp_EC_v3.dta.gz", clear

** Determine project group status in the address panel
   gen proj_grp=""

* HORNER VALUE FOR PROJECT GROUP 
   replace proj_grp="HR 1995"  if bdg=="HR004" & year==1995 	
   replace proj_grp="HR 1995"  if bdg=="HR006" & year==1995
   replace proj_grp="HR 1995"  if bdg=="HR008" & year==1995 
   replace proj_grp="HR 1995"  if bdg=="HR009" & year==1995 
   replace proj_grp="HR 1995"  if bdg=="HR012" & year==1995 
   replace proj_grp="HR 1995"  if bdg=="HR013" & year==1995 
   replace proj_grp="HR 1995"  if bdg=="HR023" & year==1995 
   replace proj_grp="HR 1995"  if bdg=="HR024" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR926" & year==1995
 
   replace proj_grp="HR 1995"  if bdg=="HR003" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR007" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR014" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR015" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR016" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR017" & year==1995  
   replace proj_grp="HR 1995"  if bdg=="HR018" & year==1995 
   replace proj_grp="HR 1995"  if bdg=="HR025" & year==1995  

* MADDEN PARK PROJECT GROUP
   replace proj_grp="MA 1995"  if bdg=="MA010" & year==1995 	
   replace proj_grp="MA 1998"  if bdg=="MA011" & year==1998  
   replace proj_grp="MA 1995"  if bdg=="MA009" & year==1995
   replace proj_grp="MA 1998"  if bdg=="MA009" & year==1998
   
* ROBERT TAYLOR PROJECT GROUP
   replace proj_grp="RT 1995"  if bdg=="RT001" & year==1995 
   
   replace proj_grp="RT 1998"  if bdg=="RT004" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT010" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT011" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT016" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT017" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT020" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT021" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT025" & year==1998 
   replace proj_grp="RT 1998"  if bdg=="RT028" & year==1998 
   
   replace proj_grp="RT 1995"  if bdg=="RT002" & year==1995
   replace proj_grp="RT 1998"  if bdg=="RT002" & year==1998
   
   replace proj_grp="RT 1995"  if bdg=="RT003" & year==1995
   
   replace proj_grp="RT 1995"  if bdg=="RT005" & year==1995
   replace proj_grp="RT 1998"  if bdg=="RT005" & year==1998
   
   replace proj_grp="RT 1995"  if bdg=="RT006" & year==1995 
   replace proj_grp="RT 1995"  if bdg=="RT007" & year==1995
   
   replace proj_grp="RT 1995"  if bdg=="RT009" & year==1995
   replace proj_grp="RT 1998"  if bdg=="RT009" & year==1998
   
   replace proj_grp="RT 1995"  if bdg=="RT012" & year==1995 
   replace proj_grp="RT 1998"  if bdg=="RT012" & year==1998 
   
   replace proj_grp="RT 1995"  if bdg=="RT013" & year==1995 
   replace proj_grp="RT 1995"  if bdg=="RT014" & year==1995
   
   replace proj_grp="RT 1995"  if bdg=="RT018" & year==1995 
   replace proj_grp="RT 1998"  if bdg=="RT018" & year==1998
   
   replace proj_grp="RT 1995"  if bdg=="RT019" & year==1995 
   replace proj_grp="RT 1995"  if bdg=="RT024" & year==1995  
   replace proj_grp="RT 1995"  if bdg=="RT026" & year==1995 
   replace proj_grp="RT 1995"  if bdg=="RT027" & year==1995 

* WASHINGTON PARK PROJECT GROUP  
   replace proj_grp="WA 1995"  if bdg=="WA026" & year==1995 	 
   replace proj_grp="WA 1995"  if bdg=="WA044" & year==1995 
   replace proj_grp="WA 1995"  if bdg=="WA085" & year==1995 
   
   replace proj_grp="WA 1995"  if bdg=="WA035" & year==1995 	
   replace proj_grp="WA 1995"  if bdg=="WA042" & year==1995 
   replace proj_grp="WA 1995"  if bdg=="WA065" & year==1995 
   replace proj_grp="WA 1995"  if bdg=="WA139" & year==1995 

* WELLS EXTENSION PROJECT GROUP
   replace proj_grp="WE 1995"  if bdg=="WE001" & year==1995   
   replace proj_grp="WE 1995"  if bdg=="WE002" & year==1995    
   replace proj_grp="WE 1995"  if bdg=="WE003" & year==1995    
   replace proj_grp="WE 1995"  if bdg=="WE006" & year==1995    
   
   replace proj_grp="WE 1995"  if bdg=="WE004" & year==1995       
   replace proj_grp="WE 1995"  if bdg=="WE005" & year==1995    
   replace proj_grp="WE 1995"  if bdg=="WE007" & year==1995    
   replace proj_grp="WE 1995"  if bdg=="WE008" & year==1995   
   replace proj_grp="WE 1995"  if bdg=="WE009" & year==1995    
   replace proj_grp="WE 1995"  if bdg=="WE010" & year==1995   

 * STATEWAY GARDEN PROJECT GROUP
   replace proj_grp="SG 1996"  if bdg=="SW004" & year==1996
   
   replace proj_grp="SG 1996"  if bdg=="SW001" & year==1996  	
   replace proj_grp="SG 1996"  if bdg=="SW003" & year==1996 	
   replace proj_grp="SG 1996"  if bdg=="SW005" & year==1996 	
   replace proj_grp="SG 1996"  if bdg=="SW006" & year==1996 	
   replace proj_grp="SG 1996"  if bdg=="SW007" & year==1996 	
   replace proj_grp="SG 1996"  if bdg=="SW008" & year==1996 	
   replace proj_grp="SG 1996"  if bdg=="SW009" & year==1996 

* ROCKWALL GARDENS PROJECT GROUP
   replace proj_grp="RC 1998"  if bdg=="RC001" & year==1998
   replace proj_grp="RC 1998"  if bdg=="RC002" & year==1998
   replace proj_grp="RC 1998"  if bdg=="RC004" & year==1998
   replace proj_grp="RC 1998"  if bdg=="RC006" & year==1998
   
* data checks
assert treat~=. if proj_grp~=""
assert adate~=. if proj_grp~="" & treat==1

* flag treated
gen demo_year = year(adate)
gen treated = (demo_year==year)

* check
assert proj_grp~="" if treated==1

keep if proj_grp~=""
keep chdhsid_201212 proj_grp bdg treat adate address state city zip
duplicates drop chdhsid_201212 proj_grp bdg treat adate, force

* save
lab data "List of IDs (persons) living in PH at the time of the demolition."

save "${demo}list_chdhsid_demo_residents.dta", replace

}




